*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do files uses clusters generated in C++ (lines 2-17 of Algorithm 1) to 
* complete the deliniation of prime locations (lines 18-34 of Algorithm 1) in US MSAs
* This do file is run for various p-values (defined by gloabl $SL)
* The preferred outputs will be chosen later
	
* Create temporary space for output
	capture mkdir "$temp/grid_PL_output_p${SL}"	
	
* Loop over CBSAs
	qui u "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.dta", clear	
	qui tab cbsafp
	local Ncbsafp = r(N) // Total number of cities in list
	levelsof  cbsafp, local(USMETROIDS)	
	local count = 1
	
* Begin loop by metro 
			foreach USmid of local USMETROIDS{	
			di "...Working on CBSA `USmid', number `count' of `Ncbsafp'..."
			u "$dataoutput/USMetroGridEmp/pval_${SL}/EmpGrid_`USmid'", clear	// Using p-value defined by global macro $SL
			* Fix special cases
				 qui sum clusterID if clusterID != . // If there are no clusters
					local MaxCluster = r(max) 
					qui sum grid_x 
					local mingridX = round(r(min),0)
					qui sum grid_y if grid_x ==  `mingridX'
					local mingridY = round(r(min),0)
					if `MaxCluster' == 0 {
						 qui replace clusterID = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace developable = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace cell_total_emp = 999 if grid_x == `mingridX' & grid_y == `mingridY'
					}
					else {
						}
				qui sum clusterID if clusterID != .  // If there are too many clusters
					local MaxCluster = r(max) 
					qui sum grid_x 
					local mingridX = round(r(min),0)
					qui sum grid_y if grid_x ==  `mingridX'
					local mingridY = round(r(min),0)
					if `MaxCluster' > 50 {
						 qui replace clusterID = 0
						 qui replace clusterID = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace developable = 1 if grid_x == `mingridX' & grid_y == `mingridY'
						 qui replace cell_total_emp = 999 if grid_x == `mingridX' & grid_y == `mingridY'
					}
					else {
						}
			
			
			* DATA PREP ********************************************************
				qui ren cell_total_emp emp
			* Cluster employment relative to max PL employment in city
				qui gen TrimmedCluster = clusterID > 0							// Trimmed cluster variable
				qui egen Cemp = sum(emp), by(metro_id clusterID)
				qui egen Carea = count(emp), by(metro_id clusterID)
				qui gen Cempdens = 	Cemp  / Carea	
				qui egen CempMax = max(Cemp) if clusterID != 0, by(metro_id)	// Employment of largest cluster in metro
				qui gen CempRel = Cemp/CempMax 									// Cluster employment relative to largest cluster in metro
				qui replace TrimmedCluster = 0 if CempRel < $PLtresh			// Restrict to clusters with minimum relative employment

			* Ceneterate trimmed cluster variables	
				qui gen TrimmedClusterID = clusterID*TrimmedCluster if clusterID > 0 // Trimmed clusters identified
				qui egen TrimmedClusterX = mean(grid_x) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)
				qui egen TrimmedClusterY = mean(grid_y) if TrimmedCluster > 0, by(TrimmedClusterID metro_id)		
	
			* AGGREGATION ******************************************************
				qui sum metro_id
				global MetroMin=r(min)
				global MetroMax=r(max)
				foreach num of numlist $MetroMin / $MetroMax { 					// Loop over cities
				local O = 999
				local i = 1
				while `O' > 0 {
				display "...round `i' of city `num'..."
				qui gen TrimmedClusterIDold = TrimmedClusterID // save old ID
				AGGCLUSTER2 `num' $DISTtresh
				qui gen test = abs(TrimmedClusterIDold-TrimmedClusterID) 		// generate objective
				qui sum test
				local O = r(sum)
				qui drop TrimmedClusterIDold test
				local i = `i'+1
				}
				}
				* Generate PL ID
				capture gen PLID = .											// gen PL ID variable
				qui replace PLID = TrimmedClusterID 

			* GENERATING CONVEX HULL *******************************************
				foreach num of numlist $MetroMin / $MetroMax { 					// loop over metros
				levelsof PLID if metro_id == `num', local(levels) 				// get list of PL IDs in a metro
				foreach pl of numlist `levels' {								// loop over PL IDs
					CH `num' `pl' 2
				}																// PL loop closes
				}																// metro loop closes
				
				* Clean temp folder 
				preserve 
					filelist, dir("$temp/PLtemp")
					levelsof filename, local(files)
					qui foreach f of local files{
						erase "$temp/PLtemp/`f'"
					}
				restore 
				
			* SPLIT PLs if parts are non-contiguous ****************************
				SPLITTER $SplitDist	
				
			* DISCONNECT PLs if they are too large *****************************
			foreach num of numlist $MetroMin / $MetroMax { 						// loop over metros
				levelsof PLID if metro_id == `num', local(levels) 				// get list of PL IDs in a metro
				foreach pl of numlist `levels' {								// loop over PL IDs
					DISCONNECT `pl' $DisconnectDISTthresh	
				}																// PL loop closes
				}	
									
			* Generate PL variables	gen PL = PLID>0 & PLID != .
				display "...finalizing city..."
				qui gen PL = PLID>0 & PLID != .
				qui gen PL_x = grid_x if PL == 1
				qui gen PL_y = grid_y if PL == 1
				qui egen PL_emp = sum(emp) if PL == 1,by(metro_id PLID)
				qui egen PL_area = sum(area_g) if PL == 1,by(metro_id PLID)
				qui gen PL_empdens = PL_emp / PL_area
				qui egen PLX = mean(grid_x) if PLID > 0 & PLID != ., by(PLID metro_id)
				qui egen PLY = mean(grid_y) if PLID > 0 & PLID != ., by(PLID metro_id)		
				qui egen metro_emp = sum(emp) ,by(metro_id )

			* Flag as pseudo-PLs an drop unless the only one in city ***********
				qui gen PPL = PL_emp < 10000  									// If PL has not enough employment
			preserve // Compute rank
				qui keep PL PPL PLID cbsafp PL_emp
					qui drop if PL == 0
					qui duplicates drop
					qui replace PL_emp= PL_emp+runiform()-0.5					// add small random employment to break potential ties
					qui egen PLrankByCity = rank(PL_emp), by(cbsafp) field 
					qui save "$temp/PLID_metro_rank_temp.dta", replace
				restore
				qui merge m:1 PLID cbsafp using "$temp/PLID_metro_rank_temp.dta", keepusing(PLrankByCity)	
				qui drop _m
				qui erase "$temp/PLID_metro_rank_temp.dta"
				qui  foreach var of varlist PL* {								// remove pseudo PLs, except if they are the larges PPL in the metro
					qui replace `var' = . if PPL == 1 & PLrankByCity != 1
				}
				qui replace PL = 0 if PL == .
				qui drop PL_emp // Now need to update PL_emp
				qui egen PL_emp = sum(emp) if PL == 1, by(cbsafp PLID)
				qui egen emp_insidePL = sum(emp) if PL == 1, by(cbsafp)
				
			* Generate employments share within and outside PLs
				foreach ind in MFG NT PS TS O ST {
				qui egen `ind'emp_insidePL = sum(cell_`ind'_emp) if PL == 1, by(cbsafp)
				qui egen `ind'emp_outsidePL = sum(cell_`ind'_emp) if PL == 0, by(cbsafp)
				foreach name in insidePL outsidePL {
					egen temp = mean(`ind'emp_`name'), by(cbsafp)
					drop `ind'emp_`name'
					ren temp `ind'emp_`name'
				}
				qui gen `ind'empshare_insidePL = `ind'emp_insidePL / emp_insidePL * 100
					label var `ind'empshare_insidePL "`ind' share within PLs (%)"
				qui gen `ind'empshare_outsidePL = `ind'emp_outsidePL / (metro_emp-emp_insidePL) *100
					label var `ind'empshare_outsidePL "`ind' share outside PLs (%)"
			}
			* MAP PL
				local title = metro_name[1]
				twoway  (scatter grid_y grid_x , msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.25)) ///
						(scatter grid_y grid_x if developable == 1, msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.4)) ///
						(scatter grid_y grid_x if emp > 0 , msymbol(s) mlcolor(none) msize(tiny) mcolor(black*0.75)) ///
						(scatter grid_y grid_x if  PL == 1 , msize(tiny)  mlcolor(none)  msymbol(s) mcolor(red*0.5)) ///
						(scatter PLY PLX , msize(tiny) msymbol(s)  mlcolor(none)  mcolor(red) mlabel(PLrankByCity) mlabcolor(red)) ///
						, scheme(s1mono) legend(off) xsize(5) ysize(5) xlabel(0[70000]70000)		ylabel(0[70000]70000) graphregion(color(white))	title("`title'") 
				* Generate simple maps for quick inspections - not the final maps in the paper and toolkit!
				capture mkdir "$temp/figures_App"
				capture mkdir "$temp/figures_App/US-CBSAs"
				capture mkdir "$temp/figures_App/US-CBSAs/p${SL}"
				capture mkdir "$temp/figures_App/US-CBSAs/p${SL}/MAPS"
				qui graph export "$temp/figures_App/US-CBSAs/p${SL}/MAPS/MAPS_PL_TotalEmp_BaseP_`USmid'.png", replace height(1000) width(1000)
			
			* Save outputs *****************************************************
				qui drop TrimmedCluster Cemp Carea Cempdens CempMax CempRel TrimmedClusterID TrimmedClusterX TrimmedCluster
				qui save "$temp/grid_PL_output_p${SL}/grid_PL_output_p${SL}_`USmid'.dta", replace
				qui keep if PL == 1
				qui tostring cbsafp, gen(cbsa)
				qui tostring PLrankByCity , gen(PLIDstring)
				qui gen PLID_US = cbsa+"_"+PLIDstring
				qui drop PLIDstring cbsa
				qui save "$temp/grid_PL_output_p${SL}/PL_grids_p${SL}_`USmid'", replace				
				qui keep emp PLID PLID_US  lon_UL lat_UL lon_LR lat_LR cbsafp PLrankByCity flag_DCPL
				qui export delimited "$temp/grid_PL_output_p${SL}/PL_grids_p${SL}_`USmid'.csv", replace
			*Reset counter
				local count = `count' +1
			}
	* Loop ends
	
* Append outputs of loops into metro-grid files
		display "...compiling employment metro-grid data..."
		display `USMETROIDS'
		clear
		foreach USmid of local USMETROIDS{
			qui append using "$temp/grid_PL_output_p${SL}/grid_PL_output_p${SL}_`USmid'.dta"
			}	
			qui save "$temp/grid_PL_output_p${SL}/grid_PL_output_p${SL}__all.dta", replace
		clear 
		foreach USmid of local USMETROIDS{
			qui append using "$temp/grid_PL_output_p${SL}/PL_grids_p${SL}_`USmid'"
			}	
			qui keep if PL == 1
			qui save "$temp/grid_PL_output_p${SL}/PL_grids_p${SL}__all.dta", replace
			
* Script ends
